In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split  # Optional, if splitting manually not used
from sklearn.metrics import (mean_squared_error,mean_absolute_error,mean_absolute_percentage_error,r2_score)
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense, Dropout, Embedding, Flatten, Concatenate, BatchNormalization)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import os 
import random
In [4]:
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)        
    random.seed(seed)                               
    np.random.seed(seed)                            
    tf.random.set_seed(seed)                        

set_seed(42)

Overnights 2024 LR=0.001 Model 3¶

In [232]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [233]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['overnights_next_month'] = cool.groupby('country')['overnights'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

cool
# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['overnights_next_month'].notna()].copy()
In [234]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['overnights_next_month'].values
In [235]:
# Time-based split
train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

# Define model3
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model3 = Model(inputs=[input_numeric, input_country], outputs=output)
model3.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model3.summary()

# Train model3
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model3.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_26"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_26        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_26          │ (None, 10)        │          0 │ embedding_26[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_26      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_26[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_78 (Dense)    │ (None, 64)        │      2,304 │ concatenate_26[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_78[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_52          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_79 (Dense)    │ (None, 32)        │      2,080 │ dropout_52[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_79[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_53          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_80 (Dense)    │ (None, 1)         │         33 │ dropout_53[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 28s 11ms/step - loss: 89.2403 - val_loss: 35.5990
Epoch 2/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 24.8093 - val_loss: 2.2839
Epoch 3/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 4.6342 - val_loss: 1.7292
Epoch 4/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 3.3075 - val_loss: 1.7541
Epoch 5/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.9630 - val_loss: 1.6112
Epoch 6/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.7032 - val_loss: 1.5946
Epoch 7/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.4437 - val_loss: 1.5346
Epoch 8/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 2.0947 - val_loss: 1.5173
Epoch 9/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 1.9937 - val_loss: 1.5489
Epoch 10/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 10ms/step - loss: 1.8901 - val_loss: 1.5497
Epoch 11/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.7351 - val_loss: 1.6645
Epoch 12/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.6302 - val_loss: 1.5062
Epoch 13/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.6041 - val_loss: 1.5826
Epoch 14/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.4879 - val_loss: 1.6785
Epoch 15/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.4391 - val_loss: 1.5853
Epoch 16/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.4526 - val_loss: 1.5137
Epoch 17/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.3679 - val_loss: 1.5973
Epoch 18/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.4182 - val_loss: 1.5707
Epoch 19/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3537 - val_loss: 1.6190
Epoch 20/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 14ms/step - loss: 1.2815 - val_loss: 1.6774
Epoch 21/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 8ms/step - loss: 1.2749 - val_loss: 1.6142
Epoch 22/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2937 - val_loss: 1.5982
Epoch 23/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2810 - val_loss: 1.6085
Epoch 24/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2816 - val_loss: 1.6001
Epoch 25/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2587 - val_loss: 1.6219
Epoch 26/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2451 - val_loss: 1.6651
Epoch 27/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1851 - val_loss: 1.6070
Epoch 28/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2446 - val_loss: 1.6157
Epoch 29/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1416 - val_loss: 1.6052
Epoch 30/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0938 - val_loss: 1.6009
Epoch 31/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1726 - val_loss: 1.6858
Epoch 32/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1481 - val_loss: 1.6473
Epoch 33/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.1703 - val_loss: 1.6512
Epoch 34/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1190 - val_loss: 1.5767
Epoch 35/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.1113 - val_loss: 1.5925
Epoch 36/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1510 - val_loss: 1.5767
Epoch 37/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0962 - val_loss: 1.6330
Epoch 38/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1153 - val_loss: 1.5236
Epoch 39/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1686 - val_loss: 1.5506
Epoch 40/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1147 - val_loss: 1.5003
Epoch 41/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1116 - val_loss: 1.6148
Epoch 42/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1468 - val_loss: 1.5725
Epoch 43/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.0854 - val_loss: 1.5362
Epoch 44/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0844 - val_loss: 1.6029
Epoch 45/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.0681 - val_loss: 1.5509
Epoch 46/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0551 - val_loss: 1.5586
Epoch 47/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0376 - val_loss: 1.6248
Epoch 48/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0158 - val_loss: 1.5275
Epoch 49/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.0508 - val_loss: 1.5094
Epoch 50/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0701 - val_loss: 1.5162
Epoch 51/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0496 - val_loss: 1.4887
Epoch 52/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.0185 - val_loss: 1.5744
Epoch 53/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0444 - val_loss: 1.4563
Epoch 54/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0266 - val_loss: 1.5447
Epoch 55/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9903 - val_loss: 1.5421
Epoch 56/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0146 - val_loss: 1.6193
Epoch 57/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.0453 - val_loss: 1.5150
Epoch 58/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0113 - val_loss: 1.4521
Epoch 59/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9473 - val_loss: 1.5612
Epoch 60/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9989 - val_loss: 1.6867
Epoch 61/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9996 - val_loss: 1.4673
Epoch 62/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.9882 - val_loss: 1.5372
Epoch 63/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9569 - val_loss: 1.4765
Epoch 64/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0222 - val_loss: 1.5254
Epoch 65/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9942 - val_loss: 1.5092
Epoch 66/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9570 - val_loss: 1.4974
Epoch 67/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9730 - val_loss: 1.5087
Epoch 68/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9471 - val_loss: 1.4377
Epoch 69/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9546 - val_loss: 1.5303
Epoch 70/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9070 - val_loss: 1.4746
Epoch 71/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9737 - val_loss: 1.5554
Epoch 72/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9728 - val_loss: 1.5023
Epoch 73/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0142 - val_loss: 1.5233
Epoch 74/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9267 - val_loss: 1.4812
Epoch 75/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9398 - val_loss: 1.4838
Epoch 76/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9100 - val_loss: 1.4992
Epoch 77/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8853 - val_loss: 1.4566
Epoch 78/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9416 - val_loss: 1.4769
Epoch 79/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 0.8910 - val_loss: 1.5021
Epoch 80/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9447 - val_loss: 1.4970
Epoch 81/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9396 - val_loss: 1.5415
Epoch 82/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9037 - val_loss: 1.4931
Epoch 83/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8417 - val_loss: 1.4645
Epoch 84/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9084 - val_loss: 1.4984
Epoch 85/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8796 - val_loss: 1.4698
Epoch 86/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8927 - val_loss: 1.4486
Epoch 87/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9113 - val_loss: 1.5172
Epoch 88/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8731 - val_loss: 1.5109
Epoch 89/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8907 - val_loss: 1.4753
Epoch 90/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8842 - val_loss: 1.4480
Epoch 91/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8521 - val_loss: 1.4783
Epoch 92/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9067 - val_loss: 1.4194
Epoch 93/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8771 - val_loss: 1.4823
Epoch 94/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.8827 - val_loss: 1.4256
Epoch 95/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8614 - val_loss: 1.4351
Epoch 96/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.8507 - val_loss: 1.5070
Epoch 97/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8448 - val_loss: 1.4578
Epoch 98/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8651 - val_loss: 1.5128
Epoch 99/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8209 - val_loss: 1.4792
Epoch 100/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8280 - val_loss: 1.4589
In [236]:
# Baseline predictions
baseline_preds = model3.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model3.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model3.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 3: Overnights (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
34/34 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step

Model Evaluation¶

In [238]:
## Model evalauation
# Predict and flatten
train_preds = model3.predict([X_num_train, X_cat_train]).flatten()
test_preds = model3.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse3 = np.sqrt(train_mse)
test_rmse3 = np.sqrt(test_mse)

#MAE
train_mae3 = mean_absolute_error(y_train, train_preds)
test_mae3 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape3 = mean_absolute_percentage_error(y_train, train_preds)
test_mape3 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_3 = r2_score(y_train, train_preds)
test_r2_3 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse3:.4f}, MAE: {train_mae3:.4f}, MAPE: {train_mape3:.4f}, R²: {train_r2_3:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse3:.4f}, MAE: {test_mae3:.4f}, MAPE: {test_mape3:.4f}, R²: {test_r2_3:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
138/138 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
Train MSE: 0.0999, RMSE: 0.3161, MAE: 0.2388, MAPE: 0.0260, R²: 0.9798
Test MSE: 0.3100, RMSE: 0.5568, MAE: 0.4334, MAPE: 0.0432, R²: 0.9311
In [ ]:
y_pred_all = model3.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights (2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
207/207 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step
In [240]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Overnights per Country (2024)', fontsize=16, y=1.02)
plt.show()
In [241]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE          RMSE           MAE  \
0                  Austria  1.028618e+11  3.207207e+05  2.174073e+05   
1                  Belgium  2.086010e+09  4.567286e+04  2.272012e+04   
2   Bosnia and Herzegovina  8.145445e+09  9.025212e+04  4.634687e+04   
3                   Canada  1.774758e+08  1.332201e+04  8.498550e+03   
4           Czech Republic  6.500328e+10  2.549574e+05  1.214085e+05   
5                  Denmark  3.653635e+09  6.044530e+04  2.052040e+04   
6                  Finland  2.066050e+08  1.437376e+04  8.788207e+03   
7                   France  3.886561e+09  6.234229e+04  3.070490e+04   
8                  Germany  4.779323e+12  2.186166e+06  1.200339e+06   
9                  Hungary  1.688156e+10  1.299290e+05  6.355589e+04   
10                 Ireland  4.933720e+08  2.221198e+04  1.300958e+04   
11                   Italy  2.409223e+10  1.552167e+05  7.373700e+04   
12             Netherlands  2.083571e+10  1.443458e+05  6.193622e+04   
13                  Norway  1.445879e+09  3.802472e+04  1.362845e+04   
14                  Poland  1.666509e+11  4.082290e+05  2.289431e+05   
15                 Romania  5.463797e+08  2.337477e+04  1.230082e+04   
16                Slovakia  3.082129e+09  5.551692e+04  2.890555e+04   
17                Slovenia  1.659923e+11  4.074216e+05  2.047528e+05   
18                   Spain  1.214496e+08  1.102042e+04  6.626548e+03   
19                  Sweden  1.519215e+09  3.897710e+04  1.818992e+04   
20             Switzerland  4.152904e+09  6.444303e+04  3.543848e+04   
21                     USA  7.544876e+09  8.686125e+04  6.149310e+04   
22          United Kingdom  4.076366e+10  2.019001e+05  1.295231e+05   

        MAPE        R2  
0   0.576117  0.776627  
1   0.632651  0.806398  
2   0.370813  0.745085  
3   0.440671  0.789588  
4   0.283152  0.848905  
5   0.366774  0.822101  
6   0.643731  0.716840  
7   0.298985  0.898379  
8   0.478053  0.160870  
9   0.358179  0.900508  
10  0.541236  0.686613  
11  0.309986  0.894494  
12  0.296124  0.856792  
13  0.561548  0.729123  
14  0.392675  0.731027  
15  0.339013  0.847686  
16  0.244617  0.982166  
17  0.475613  0.761378  
18  0.302552  0.942903  
19  0.326946  0.878634  
20  0.300017  0.717833  
21  0.436030  0.403028  
22  0.678253  0.495580  

Overnights 2019 LR=0.001 Model 4¶

In [324]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [325]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['overnights_next_month'] = cool.groupby('country')['overnights'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

cool
# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['date'] <= '2019-12-01']
cool = cool[cool['overnights_next_month'].notna()].copy()
In [326]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['overnights_next_month'].values
In [327]:
# Time-based split
train_end = pd.Timestamp("2014-12-31")
val_end = pd.Timestamp("2017-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [328]:
# Define model4
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model4 = Model(inputs=[input_numeric, input_country], outputs=output)
model4.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model4.summary()

# Train mode4
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model4.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_36"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_36        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_36          │ (None, 10)        │          0 │ embedding_36[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_36      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_36[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_108 (Dense)   │ (None, 64)        │      2,304 │ concatenate_36[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_108[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_72          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_109 (Dense)   │ (None, 32)        │      2,080 │ dropout_72[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_109[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_73          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_110 (Dense)   │ (None, 1)         │         33 │ dropout_73[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 12s 8ms/step - loss: 90.7974 - val_loss: 46.7961
Epoch 2/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 33.5081 - val_loss: 3.1799
Epoch 3/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 5.2712 - val_loss: 0.7961
Epoch 4/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 3.8846 - val_loss: 0.5789
Epoch 5/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 3.2203 - val_loss: 0.5077
Epoch 6/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.9854 - val_loss: 0.5004
Epoch 7/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 2.8682 - val_loss: 0.4250
Epoch 8/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.5530 - val_loss: 0.4092
Epoch 9/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.3035 - val_loss: 0.4177
Epoch 10/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.0789 - val_loss: 0.3989
Epoch 11/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.9518 - val_loss: 0.3952
Epoch 12/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.9176 - val_loss: 0.3784
Epoch 13/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.7907 - val_loss: 0.4061
Epoch 14/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.6883 - val_loss: 0.3758
Epoch 15/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.6661 - val_loss: 0.3739
Epoch 16/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.6102 - val_loss: 0.3784
Epoch 17/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.5431 - val_loss: 0.3768
Epoch 18/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.5284 - val_loss: 0.4029
Epoch 19/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.5092 - val_loss: 0.3586
Epoch 20/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3832 - val_loss: 0.3747
Epoch 21/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4667 - val_loss: 0.3209
Epoch 22/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4723 - val_loss: 0.3727
Epoch 23/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.3939 - val_loss: 0.4069
Epoch 24/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.3451 - val_loss: 0.4176
Epoch 25/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 15ms/step - loss: 1.3533 - val_loss: 0.3674
Epoch 26/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3464 - val_loss: 0.3010
Epoch 27/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.3440 - val_loss: 0.3922
Epoch 28/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2810 - val_loss: 0.3386
Epoch 29/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2191 - val_loss: 0.3492
Epoch 30/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2700 - val_loss: 0.3981
Epoch 31/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3477 - val_loss: 0.3905
Epoch 32/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3152 - val_loss: 0.3948
Epoch 33/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2744 - val_loss: 0.3904
Epoch 34/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2857 - val_loss: 0.3538
Epoch 35/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.2550 - val_loss: 0.3604
Epoch 36/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2832 - val_loss: 0.3851
Epoch 37/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 1.2083 - val_loss: 0.3713
Epoch 38/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2435 - val_loss: 0.3195
Epoch 39/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2570 - val_loss: 0.3591
Epoch 40/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2140 - val_loss: 0.3145
Epoch 41/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.3116 - val_loss: 0.3504
Epoch 42/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.2215 - val_loss: 0.3300
Epoch 43/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2311 - val_loss: 0.2900
Epoch 44/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.2804 - val_loss: 0.3123
Epoch 45/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2416 - val_loss: 0.2690
Epoch 46/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.2357 - val_loss: 0.3585
Epoch 47/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.2360 - val_loss: 0.3480
Epoch 48/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 1.1846 - val_loss: 0.2819
Epoch 49/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.2401 - val_loss: 0.3116
Epoch 50/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 6ms/step - loss: 1.2199 - val_loss: 0.3275
Epoch 51/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1878 - val_loss: 0.3304
Epoch 52/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2340 - val_loss: 0.2855
Epoch 53/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 1.2356 - val_loss: 0.3079
Epoch 54/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1982 - val_loss: 0.3781
Epoch 55/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1904 - val_loss: 0.3046
Epoch 56/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1438 - val_loss: 0.3088
Epoch 57/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1583 - val_loss: 0.3761
Epoch 58/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1836 - val_loss: 0.3130
Epoch 59/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1245 - val_loss: 0.3135
Epoch 60/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1463 - val_loss: 0.3537
Epoch 61/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1222 - val_loss: 0.3582
Epoch 62/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1204 - val_loss: 0.3066
Epoch 63/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0663 - val_loss: 0.2981
Epoch 64/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1196 - val_loss: 0.3340
Epoch 65/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0988 - val_loss: 0.2898
Epoch 66/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1041 - val_loss: 0.3006
Epoch 67/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0544 - val_loss: 0.2970
Epoch 68/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1703 - val_loss: 0.2498
Epoch 69/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0619 - val_loss: 0.2769
Epoch 70/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1206 - val_loss: 0.2586
Epoch 71/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1017 - val_loss: 0.2959
Epoch 72/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1011 - val_loss: 0.2986
Epoch 73/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1399 - val_loss: 0.2408
Epoch 74/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0888 - val_loss: 0.3087
Epoch 75/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1052 - val_loss: 0.2366
Epoch 76/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0449 - val_loss: 0.3115
Epoch 77/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1046 - val_loss: 0.2994
Epoch 78/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0914 - val_loss: 0.3066
Epoch 79/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0305 - val_loss: 0.2944
Epoch 80/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0550 - val_loss: 0.2969
Epoch 81/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0954 - val_loss: 0.3187
Epoch 82/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1236 - val_loss: 0.2890
Epoch 83/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0461 - val_loss: 0.2956
Epoch 84/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0617 - val_loss: 0.2678
Epoch 85/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0369 - val_loss: 0.2509
Epoch 86/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0528 - val_loss: 0.3236
Epoch 87/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0568 - val_loss: 0.2442
Epoch 88/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0549 - val_loss: 0.2777
Epoch 89/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0523 - val_loss: 0.2935
Epoch 90/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0278 - val_loss: 0.2798
Epoch 91/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9610 - val_loss: 0.3066
Epoch 92/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0297 - val_loss: 0.2243
Epoch 93/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0141 - val_loss: 0.2905
Epoch 94/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0028 - val_loss: 0.3072
Epoch 95/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0506 - val_loss: 0.2882
Epoch 96/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0073 - val_loss: 0.2186
Epoch 97/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0086 - val_loss: 0.3141
Epoch 98/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9936 - val_loss: 0.2790
Epoch 99/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.9645 - val_loss: 0.3335
Epoch 100/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.0158 - val_loss: 0.2461
In [329]:
# Baseline predictions
baseline_preds = model4.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model4.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model4.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 3: Overnights (2019)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 18ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step

Model Evaluation¶

In [330]:
## Model evalauation
# Predict and flatten
train_preds = model4.predict([X_num_train, X_cat_train]).flatten()
test_preds = model4.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse4 = np.sqrt(train_mse)
test_rmse4 = np.sqrt(test_mse)

#MAE
train_mae4 = mean_absolute_error(y_train, train_preds)
test_mae4 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape4 = mean_absolute_percentage_error(y_train, train_preds)
test_mape4 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_4 = r2_score(y_train, train_preds)
test_r2_4 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse4:.4f}, MAE: {train_mae4:.4f}, MAPE: {train_mape4:.4f}, R²: {train_r2_4:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse4:.4f}, MAE: {test_mae4:.4f}, MAPE: {test_mape4:.4f}, R²: {test_r2_4:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
121/121 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
Train MSE: 0.1149, RMSE: 0.3390, MAE: 0.2549, MAPE: 0.0277, R²: 0.9768
Test MSE: 0.3801, RMSE: 0.6165, MAE: 0.4971, MAPE: 0.0493, R²: 0.9077
In [331]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model4.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights (2019)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
164/164 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step
In [332]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Overnights per Country (2019)', fontsize=16, y=1.02)
plt.show()
In [333]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE          RMSE           MAE  \
0                  Austria  9.030588e+10  3.005094e+05  1.864868e+05   
1                  Belgium  2.222734e+08  1.490884e+04  9.771725e+03   
2   Bosnia and Herzegovina  1.791651e+09  4.232790e+04  3.016650e+04   
3                   Canada  4.502302e+08  2.121863e+04  1.406842e+04   
4           Czech Republic  9.575052e+10  3.094358e+05  1.572649e+05   
5                  Denmark  1.668924e+09  4.085246e+04  2.036179e+04   
6                  Finland  8.806166e+08  2.967519e+04  2.118418e+04   
7                   France  5.216737e+09  7.222698e+04  3.979571e+04   
8                  Germany  3.592724e+12  1.895448e+06  1.056744e+06   
9                  Hungary  1.180876e+10  1.086681e+05  5.517401e+04   
10                 Ireland  5.457368e+08  2.336101e+04  1.457357e+04   
11                   Italy  1.764677e+11  4.200806e+05  1.610925e+05   
12             Netherlands  4.946827e+09  7.033368e+04  3.836726e+04   
13                  Norway  1.175707e+09  3.428858e+04  1.777885e+04   
14                  Poland  1.563322e+11  3.953887e+05  1.973553e+05   
15                 Romania  8.443926e+08  2.905843e+04  1.467024e+04   
16                Slovakia  5.826269e+09  7.633000e+04  3.326159e+04   
17                Slovenia  2.389104e+11  4.887847e+05  2.336568e+05   
18                   Spain  7.090350e+08  2.662771e+04  1.438300e+04   
19                  Sweden  1.700015e+09  4.123123e+04  2.305828e+04   
20             Switzerland  2.423808e+09  4.923219e+04  2.420284e+04   
21                     USA  7.054939e+09  8.399368e+04  6.491786e+04   
22          United Kingdom  5.882305e+10  2.425346e+05  1.640790e+05   

        MAPE        R2  
0   0.341512  0.793687  
1   0.349829  0.982941  
2   0.343102  0.951547  
3   0.393229  0.633062  
4   0.321398  0.778906  
5   0.609540  0.932945  
6   0.547492  0.515157  
7   0.237299  0.906078  
8   0.382247  0.148015  
9   0.358141  0.928578  
10  0.386605  0.666496  
11  0.270358  0.680285  
12  0.305097  0.965537  
13  0.326562  0.937865  
14  0.372516  0.721547  
15  0.331823  0.700616  
16  0.295391  0.967036  
17  0.409152  0.673543  
18  0.355399  0.829893  
19  0.396644  0.938800  
20  0.269205  0.814150  
21  0.510810  0.336241  
22  0.397067  0.557544  

Arrivals 2024 LR = 0.001 Model 2¶

In [ ]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [ ]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['arrivals_next_month'].notna()].copy()
In [ ]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['arrivals_next_month'].values
In [ ]:
cool
country arrivals overnights date year month month_sin month_cos eu_member euro_adopted ... overnights_lag_1 overnights_lag_3 overnights_lag_6 overnights_lag_12 cpi_lag_1 unemployment_rate_lag_3 google_trends_lag_1 google_trends_lag_3 schengen_member country_encoded
12 Austria 7.873217 9.102867 2001-01-01 2001 1 0.500000 8.660254e-01 1 1 ... 9.624303 11.286840 13.687496 9.077951 4.439116 3.6 -1.0 -1.0 1 0
13 Austria 8.664923 9.856815 2001-02-01 2001 2 0.866025 5.000000e-01 1 1 ... 9.102867 9.441055 13.771151 9.606159 4.440296 3.5 -1.0 -1.0 1 0
14 Austria 9.258368 10.391976 2001-03-01 2001 3 1.000000 6.120000e-17 1 1 ... 9.856815 9.624303 13.051318 10.189080 4.443827 3.8 -1.0 -1.0 1 0
15 Austria 10.645711 11.952940 2001-04-01 2001 4 0.866025 -5.000000e-01 1 1 ... 10.391976 9.102867 11.286840 11.828816 4.445001 4.2 -1.0 -1.0 1 0
16 Austria 11.169632 12.518896 2001-05-01 2001 5 0.500000 -8.660254e-01 1 1 ... 11.952940 9.856815 9.441055 12.243176 4.450853 4.5 -1.0 -1.0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6892 United Kingdom 11.936578 13.523750 2024-07-01 2024 7 -0.500000 -8.660254e-01 0 0 ... 13.349341 11.997115 8.512783 13.465556 5.016617 4.3 47.0 35.0 0 22
6893 United Kingdom 12.015293 13.678767 2024-08-01 2024 8 -0.866025 -5.000000e-01 0 0 ... 13.523750 12.908799 9.019059 13.632791 5.014627 4.1 47.0 32.0 0 22
6894 United Kingdom 11.742989 13.317415 2024-09-01 2024 9 -1.000000 -1.840000e-16 0 0 ... 13.678767 13.349341 9.886138 13.254668 5.018603 4.2 41.0 47.0 0 22
6895 United Kingdom 10.992100 12.530613 2024-10-01 2024 10 -0.866025 5.000000e-01 0 0 ... 13.317415 13.523750 11.997115 12.325113 5.017942 4.2 30.0 47.0 0 22
6896 United Kingdom 8.841593 9.982068 2024-11-01 2024 11 -0.500000 8.660254e-01 0 0 ... 12.530613 13.678767 12.908799 9.631482 5.023881 4.4 19.0 41.0 0 22

6599 rows × 42 columns

In [ ]:
cool.isna().sum()
country                      0
arrivals                     0
overnights                   0
date                         0
year                         0
month                        0
month_sin                    0
month_cos                    0
eu_member                    0
euro_adopted                 0
unemployment_rate            0
exchange_rate                0
industry_production        363
cpi                          0
google_trends                0
arrivals_next_month          0
month_April                  0
month_August                 0
month_December               0
month_February               0
month_January                0
month_July                   0
month_June                   0
month_March                  0
month_May                    0
month_November               0
month_October                0
month_September              0
arrivals_lag_1               0
arrivals_lag_3               0
arrivals_lag_6               0
arrivals_lag_12              0
overnights_lag_1             0
overnights_lag_3             0
overnights_lag_6             0
overnights_lag_12            0
cpi_lag_1                    0
unemployment_rate_lag_3      0
google_trends_lag_1          0
google_trends_lag_3          0
schengen_member              0
country_encoded              0
dtype: int64
In [ ]:
# Time-based split
train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]

# Define model2
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model2 = Model(inputs=[input_numeric, input_country], outputs=output)
model2.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model2.summary()

# Train model2
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model2.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_37"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_37        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_37          │ (None, 10)        │          0 │ embedding_37[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_37      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_37[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_111 (Dense)   │ (None, 64)        │      2,304 │ concatenate_37[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_111[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_74          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_112 (Dense)   │ (None, 32)        │      2,080 │ dropout_74[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_112[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_75          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_113 (Dense)   │ (None, 1)         │         33 │ dropout_75[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 14s 14ms/step - loss: 67.6397 - val_loss: 24.2574
Epoch 2/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 16.7866 - val_loss: 2.7601
Epoch 3/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 3.6225 - val_loss: 2.3808
Epoch 4/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 13ms/step - loss: 2.9429 - val_loss: 2.4379
Epoch 5/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 10ms/step - loss: 2.4179 - val_loss: 2.2861
Epoch 6/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 12ms/step - loss: 2.2089 - val_loss: 2.4861
Epoch 7/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.8824 - val_loss: 2.4279
Epoch 8/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.7382 - val_loss: 2.4234
Epoch 9/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 7s 24ms/step - loss: 1.6110 - val_loss: 2.4041
Epoch 10/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 6s 7ms/step - loss: 1.3417 - val_loss: 2.2658
Epoch 11/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 1.2643 - val_loss: 2.2970
Epoch 12/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 11ms/step - loss: 1.2401 - val_loss: 2.3647
Epoch 13/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 8s 18ms/step - loss: 1.1678 - val_loss: 2.3226
Epoch 14/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 1.1266 - val_loss: 2.3435
Epoch 15/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1508 - val_loss: 2.2937
Epoch 16/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.1442 - val_loss: 2.3006
Epoch 17/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0613 - val_loss: 2.3334
Epoch 18/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0120 - val_loss: 2.3491
Epoch 19/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0003 - val_loss: 2.3213
Epoch 20/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9837 - val_loss: 2.3538
Epoch 21/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.9836 - val_loss: 2.3499
Epoch 22/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9727 - val_loss: 2.3663
Epoch 23/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9948 - val_loss: 2.3869
Epoch 24/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9877 - val_loss: 2.3571
Epoch 25/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9401 - val_loss: 2.3121
Epoch 26/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9512 - val_loss: 2.4112
Epoch 27/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9802 - val_loss: 2.3515
Epoch 28/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9385 - val_loss: 2.4328
Epoch 29/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9237 - val_loss: 2.3702
Epoch 30/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9208 - val_loss: 2.4257
Epoch 31/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9504 - val_loss: 2.4614
Epoch 32/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.9075 - val_loss: 2.3074
Epoch 33/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8661 - val_loss: 2.2357
Epoch 34/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8901 - val_loss: 2.3487
Epoch 35/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 17ms/step - loss: 0.8557 - val_loss: 2.4129
Epoch 36/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 4s 14ms/step - loss: 0.8772 - val_loss: 2.3710
Epoch 37/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8398 - val_loss: 2.4213
Epoch 38/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8535 - val_loss: 2.3572
Epoch 39/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8696 - val_loss: 2.3496
Epoch 40/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8352 - val_loss: 2.2743
Epoch 41/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.8381 - val_loss: 2.4121
Epoch 42/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8806 - val_loss: 2.2995
Epoch 43/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8205 - val_loss: 2.3153
Epoch 44/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8615 - val_loss: 2.3236
Epoch 45/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8250 - val_loss: 2.2614
Epoch 46/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8223 - val_loss: 2.3344
Epoch 47/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8433 - val_loss: 2.3264
Epoch 48/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8229 - val_loss: 2.2402
Epoch 49/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7742 - val_loss: 2.3089
Epoch 50/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7955 - val_loss: 2.2734
Epoch 51/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7981 - val_loss: 2.2787
Epoch 52/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7627 - val_loss: 2.2760
Epoch 53/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 0.8173 - val_loss: 2.3125
Epoch 54/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.8277 - val_loss: 2.2606
Epoch 55/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.7665 - val_loss: 2.3070
Epoch 56/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7709 - val_loss: 2.2386
Epoch 57/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - loss: 0.7876 - val_loss: 2.3755
Epoch 58/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7985 - val_loss: 2.2257
Epoch 59/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7697 - val_loss: 2.3401
Epoch 60/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7739 - val_loss: 2.2278
Epoch 61/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7652 - val_loss: 2.3139
Epoch 62/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7971 - val_loss: 2.2271
Epoch 63/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7472 - val_loss: 2.2477
Epoch 64/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7693 - val_loss: 2.2629
Epoch 65/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 10ms/step - loss: 0.7221 - val_loss: 2.2515
Epoch 66/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 5s 7ms/step - loss: 0.7554 - val_loss: 2.2968
Epoch 67/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7474 - val_loss: 2.2501
Epoch 68/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7336 - val_loss: 2.1876
Epoch 69/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6973 - val_loss: 2.2026
Epoch 70/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6985 - val_loss: 2.2146
Epoch 71/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 5ms/step - loss: 0.7471 - val_loss: 2.2008
Epoch 72/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7348 - val_loss: 2.2450
Epoch 73/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7008 - val_loss: 2.1541
Epoch 74/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.7303 - val_loss: 2.1802
Epoch 75/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.7044 - val_loss: 2.2246
Epoch 76/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.7006 - val_loss: 2.2524
Epoch 77/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7326 - val_loss: 2.2514
Epoch 78/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6666 - val_loss: 2.2973
Epoch 79/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6815 - val_loss: 2.1639
Epoch 80/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6999 - val_loss: 2.2235
Epoch 81/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.6720 - val_loss: 2.2327
Epoch 82/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7127 - val_loss: 2.2349
Epoch 83/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6997 - val_loss: 2.2009
Epoch 84/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6761 - val_loss: 2.1925
Epoch 85/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6760 - val_loss: 2.2075
Epoch 86/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6772 - val_loss: 2.2640
Epoch 87/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6492 - val_loss: 2.2148
Epoch 88/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.6594 - val_loss: 2.2313
Epoch 89/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 0.6871 - val_loss: 2.1818
Epoch 90/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6710 - val_loss: 2.1486
Epoch 91/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6750 - val_loss: 2.2075
Epoch 92/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.6639 - val_loss: 2.1997
Epoch 93/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6333 - val_loss: 2.1981
Epoch 94/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 8ms/step - loss: 0.6760 - val_loss: 2.1814
Epoch 95/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6528 - val_loss: 2.2041
Epoch 96/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 5ms/step - loss: 0.6243 - val_loss: 2.2046
Epoch 97/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6439 - val_loss: 2.2165
Epoch 98/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.6469 - val_loss: 2.1735
Epoch 99/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6199 - val_loss: 2.1718
Epoch 100/100
276/276 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.6182 - val_loss: 2.2016
In [ ]:
# Baseline predictions
baseline_preds = model2.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model2.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model2.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 2: Arrivals (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step

Model Evaluation¶

In [ ]:
## Model evalauation
# Predict and flatten
train_preds = model2.predict([X_num_train, X_cat_train]).flatten()
test_preds = model2.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse2 = np.sqrt(train_mse)
test_rmse2 = np.sqrt(test_mse)

#MAE
train_mae2 = mean_absolute_error(y_train, train_preds)
test_mae2 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape2 = mean_absolute_percentage_error(y_train, train_preds)
test_mape2 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_2 = r2_score(y_train, train_preds)
test_r2_2 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse2:.4f}, MAE: {train_mae2:.4f}, MAPE: {train_mape2:.4f}, R²: {train_r2_2:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse2:.4f}, MAE: {test_mae2:.4f}, MAPE: {test_mape2:.4f}, R²: {test_r2_2:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
138/138 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step
34/34 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step
Train MSE: 0.0749, RMSE: 0.2738, MAE: 0.2059, MAPE: 0.0257, R²: 0.9807
Test MSE: 0.4215, RMSE: 0.6492, MAE: 0.4911, MAPE: 0.0579, R²: 0.8919
In [ ]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model2.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals (2024)")
plt.xlabel("Date")
plt.ylabel("Total Arrivals")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
207/207 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step
In [ ]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals per Country (2024)', fontsize=16, y=1.02)
plt.show()
In [ ]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE           RMSE            MAE  \
0                  Austria  5.081636e+09   71285.595256   49918.328535   
1                  Belgium  6.537096e+07    8085.231123    4196.487165   
2   Bosnia and Herzegovina  1.642307e+08   12815.252438    8874.936591   
3                   Canada  1.947356e+07    4412.885448    2575.362746   
4           Czech Republic  1.393131e+09   37324.668759   19292.966149   
5                  Denmark  1.315828e+08   11470.952964    4718.637211   
6                  Finland  1.638380e+07    4047.691017    2332.584180   
7                   France  4.337233e+08   20826.025097   12390.277720   
8                  Germany  7.127464e+10  266973.101306  161100.764622   
9                  Hungary  1.225432e+09   35006.166906   18925.231006   
10                 Ireland  3.426333e+07    5853.489093    3640.333267   
11                   Italy  1.647708e+09   40591.974631   20378.707260   
12             Netherlands  6.173665e+08   24846.861121   12934.523989   
13                  Norway  6.668156e+07    8165.878025    3941.330346   
14                  Poland  3.940971e+09   62777.156710   35559.869517   
15                 Romania  3.460065e+07    5882.232142    3181.175834   
16                Slovakia  1.441173e+08   12004.884555    6607.882405   
17                Slovenia  2.817913e+09   53084.015108   37657.455940   
18                   Spain  5.689977e+07    7543.193539    4248.567497   
19                  Sweden  5.434318e+07    7371.782476    3788.066846   
20             Switzerland  2.199884e+08   14832.006118    8522.123304   
21                     USA  3.815869e+08   19534.250033   13980.165553   
22          United Kingdom  8.179276e+08   28599.433484   17370.251709   

        MAPE        R2  
0   0.574757  0.617748  
1   0.581596  0.851310  
2   0.408353  0.854232  
3   0.614233  0.829186  
4   0.311453  0.848878  
5   0.522899  0.700217  
6   0.762202  0.516292  
7   0.414942  0.834006  
8   0.527543  0.221635  
9   0.459027  0.787361  
10  0.653992  0.523599  
11  0.382417  0.811186  
12  0.397557  0.803672  
13  0.792227  0.571320  
14  0.477038  0.710003  
15  0.390700  0.815610  
16  0.313489  0.961850  
17  0.549796  0.772319  
18  0.401895  0.862284  
19  0.472303  0.882823  
20  0.345093  0.620763  
21  0.455143  0.801948  
22  0.743835  0.738716  

Arrivals 2019 LR=0.001 Model 1¶

In [266]:
# Load and clean data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
lag_cols = [col for col in cool.columns if 'lag' in col]
cool = cool.drop(columns=lag_cols, errors='ignore')
cool = cool[~cool['country'].isin(['Russian Federation'])]
In [267]:
# Convert types
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)
cool['euro_adopted'] = cool['euro_adopted'].astype(int)

# Merge Google Trends data
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Sort and log-transform
cool = cool.sort_values(['country', 'date']).reset_index(drop=True)
cool = cool.drop_duplicates(subset=['country', 'date'], keep='first').reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals']) 
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])
cool['arrivals_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

# One-hot encode month
month_names = {i: month for i, month in enumerate(['January','February','March','April','May','June','July','August','September','October','November','December'], 1)}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool, ohe_month], axis=1).drop(columns=['month_name'])

# Create lags
lags = {'arrivals': [1, 3, 6, 12], 'overnights': [1, 3, 6, 12], 'cpi': [1], 'unemployment_rate': [3], 'google_trends': [1, 3]}
for var, steps in lags.items():
    for lag in steps:
        cool[f'{var}_lag_{lag}'] = cool.groupby('country')[var].shift(lag)

# Step 1: Define Schengen entry years
schengen_entry_year = {
    'Austria': 1995,
    'Belgium': 1995,
    'Czech Republic': 2007,
    'Denmark': 2001,
    'Finland': 1996,
    'France': 1995,
    'Germany': 1995,
    'Hungary': 2007,
    'Italy': 1997,
    'Netherlands': 1995,
    'Norway': 2001,
    'Poland': 2007,
    'Portugal': 1995,
    'Slovakia': 2007,
    'Slovenia': 2007,
    'Spain': 1995,
    'Sweden': 2001,
    'Switzerland': 2008,
    'Romania': 2024,}

def is_schengen_member(row):
    entry_year = schengen_entry_year.get(row['country'], np.inf)
    return int(row['year'] >= entry_year)

cool['schengen_member'] = cool.apply(is_schengen_member, axis=1).astype('int8')

# Filter and drop missing
cool.isna().sum().sort_values(ascending=False) 
cool = cool[cool['date'] >= '2001-01-01']
cool = cool[cool['date'] <= '2019-12-01']

cool = cool[cool['arrivals_next_month'].notna()].copy()
In [268]:
# Label encode country and scale numeric features
cool['country_encoded'] = LabelEncoder().fit_transform(cool['country'])
month_cols = [col for col in cool.columns if col.startswith('month_')]
X_numeric = cool[[
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member','euro_adopted'] + month_cols].values
X_country_array = cool['country_encoded'].astype('int32').values
y = cool['arrivals_next_month'].values
In [269]:
# Time-based split
train_end = pd.Timestamp("2014-12-31")
val_end = pd.Timestamp("2017-12-31")
train_mask = cool['date'] <= train_end
val_mask = (cool['date'] > train_end) & (cool['date'] <= val_end)
test_mask = cool['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask])
X_num_val = scaler.transform(X_numeric[val_mask])
X_num_test = scaler.transform(X_numeric[test_mask])
X_numeric_scaled_all = scaler.transform(X_numeric)  

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [270]:
# Define model4
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
n_countries = cool['country_encoded'].nunique()
embedding = Embedding(input_dim=n_countries, output_dim=10)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)
model1 = Model(inputs=[input_numeric, input_country], outputs=output)
model1.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
model1.summary()

# Train mode4
early_stop = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
history = model1.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16, callbacks=[early_stop]
)
Model: "functional_30"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_30        │ (None, 1, 10)     │        230 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_30          │ (None, 10)        │          0 │ embedding_30[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_30      │ (None, 35)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_30[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_90 (Dense)    │ (None, 64)        │      2,304 │ concatenate_30[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_90[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_60          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_91 (Dense)    │ (None, 32)        │      2,080 │ dropout_60[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_91[0][0]    │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_61          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_92 (Dense)    │ (None, 1)         │         33 │ dropout_61[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 5,031 (19.65 KB)
 Trainable params: 4,839 (18.90 KB)
 Non-trainable params: 192 (768.00 B)
Epoch 1/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 9s 8ms/step - loss: 67.7775 - val_loss: 31.6445
Epoch 2/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 22.3988 - val_loss: 1.5322
Epoch 3/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 3.8682 - val_loss: 0.7337
Epoch 4/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 3.3002 - val_loss: 0.5032
Epoch 5/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 2.6980 - val_loss: 0.4413
Epoch 6/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 2.3971 - val_loss: 0.3308
Epoch 7/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 2.0858 - val_loss: 0.3201
Epoch 8/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.9076 - val_loss: 0.3460
Epoch 9/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 9ms/step - loss: 1.8606 - val_loss: 0.3169
Epoch 10/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.6263 - val_loss: 0.3324
Epoch 11/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.5082 - val_loss: 0.2692
Epoch 12/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3668 - val_loss: 0.2613
Epoch 13/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.4066 - val_loss: 0.3108
Epoch 14/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.2721 - val_loss: 0.2947
Epoch 15/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.3073 - val_loss: 0.2788
Epoch 16/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 1.1615 - val_loss: 0.2917
Epoch 17/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.2453 - val_loss: 0.2769
Epoch 18/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.1692 - val_loss: 0.2724
Epoch 19/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - loss: 1.1334 - val_loss: 0.3020
Epoch 20/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.1748 - val_loss: 0.2422
Epoch 21/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 1.1109 - val_loss: 0.2975
Epoch 22/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 1.0664 - val_loss: 0.3064
Epoch 23/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 1.1109 - val_loss: 0.2958
Epoch 24/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0124 - val_loss: 0.2453
Epoch 25/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0531 - val_loss: 0.2697
Epoch 26/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0846 - val_loss: 0.2782
Epoch 27/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0858 - val_loss: 0.2751
Epoch 28/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 1.0140 - val_loss: 0.2530
Epoch 29/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0381 - val_loss: 0.2716
Epoch 30/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9978 - val_loss: 0.3025
Epoch 31/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 1.0015 - val_loss: 0.2562
Epoch 32/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9500 - val_loss: 0.2932
Epoch 33/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.9884 - val_loss: 0.2509
Epoch 34/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9633 - val_loss: 0.2902
Epoch 35/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9817 - val_loss: 0.3144
Epoch 36/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9323 - val_loss: 0.2615
Epoch 37/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9170 - val_loss: 0.2546
Epoch 38/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9498 - val_loss: 0.2824
Epoch 39/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9296 - val_loss: 0.2132
Epoch 40/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9895 - val_loss: 0.2419
Epoch 41/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 10ms/step - loss: 0.9238 - val_loss: 0.3262
Epoch 42/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9853 - val_loss: 0.2984
Epoch 43/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.9708 - val_loss: 0.2598
Epoch 44/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - loss: 0.9413 - val_loss: 0.2332
Epoch 45/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9508 - val_loss: 0.2466
Epoch 46/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8969 - val_loss: 0.2490
Epoch 47/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9293 - val_loss: 0.2760
Epoch 48/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 7ms/step - loss: 0.9200 - val_loss: 0.2497
Epoch 49/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9380 - val_loss: 0.2382
Epoch 50/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9109 - val_loss: 0.2762
Epoch 51/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.9188 - val_loss: 0.2745
Epoch 52/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.9029 - val_loss: 0.2906
Epoch 53/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.9281 - val_loss: 0.2222
Epoch 54/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 9ms/step - loss: 0.8881 - val_loss: 0.2893
Epoch 55/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8973 - val_loss: 0.2331
Epoch 56/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8678 - val_loss: 0.2558
Epoch 57/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 0.8569 - val_loss: 0.2376
Epoch 58/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8497 - val_loss: 0.2230
Epoch 59/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8584 - val_loss: 0.2637
Epoch 60/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8452 - val_loss: 0.2468
Epoch 61/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8408 - val_loss: 0.2767
Epoch 62/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8325 - val_loss: 0.2766
Epoch 63/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8907 - val_loss: 0.3192
Epoch 64/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 10ms/step - loss: 0.8712 - val_loss: 0.1979
Epoch 65/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.8256 - val_loss: 0.2884
Epoch 66/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.8836 - val_loss: 0.2676
Epoch 67/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8484 - val_loss: 0.2426
Epoch 68/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 9ms/step - loss: 0.8475 - val_loss: 0.2432
Epoch 69/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8207 - val_loss: 0.2640
Epoch 70/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8451 - val_loss: 0.2363
Epoch 71/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7694 - val_loss: 0.2579
Epoch 72/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8216 - val_loss: 0.3550
Epoch 73/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8074 - val_loss: 0.2073
Epoch 74/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8457 - val_loss: 0.2140
Epoch 75/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7992 - val_loss: 0.2816
Epoch 76/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8139 - val_loss: 0.2271
Epoch 77/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7944 - val_loss: 0.2203
Epoch 78/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8483 - val_loss: 0.2180
Epoch 79/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 9ms/step - loss: 0.8018 - val_loss: 0.2443
Epoch 80/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8288 - val_loss: 0.2461
Epoch 81/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8137 - val_loss: 0.2634
Epoch 82/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7978 - val_loss: 0.2523
Epoch 83/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 12ms/step - loss: 0.7900 - val_loss: 0.2177
Epoch 84/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - loss: 0.7369 - val_loss: 0.2790
Epoch 85/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.8106 - val_loss: 0.2041
Epoch 86/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7447 - val_loss: 0.2484
Epoch 87/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 3s 8ms/step - loss: 0.7948 - val_loss: 0.2330
Epoch 88/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7684 - val_loss: 0.2200
Epoch 89/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7770 - val_loss: 0.2393
Epoch 90/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.8086 - val_loss: 0.1829
Epoch 91/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7575 - val_loss: 0.1823
Epoch 92/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7621 - val_loss: 0.1903
Epoch 93/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 6ms/step - loss: 0.7652 - val_loss: 0.2086
Epoch 94/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7786 - val_loss: 0.2223
Epoch 95/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7741 - val_loss: 0.2094
Epoch 96/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 2s 7ms/step - loss: 0.7545 - val_loss: 0.1820
Epoch 97/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7618 - val_loss: 0.2086
Epoch 98/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7097 - val_loss: 0.1998
Epoch 99/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 6ms/step - loss: 0.7333 - val_loss: 0.2348
Epoch 100/100
242/242 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 0.7541 - val_loss: 0.2034
In [271]:
# Baseline predictions
baseline_preds = model1.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'arrivals_lag_1', 'arrivals_lag_3', 'arrivals_lag_6', 'arrivals_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model1.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(max(0, permuted_mse - baseline_mse))

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model1.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = max(0, mean_squared_error(y_test, month_preds) - baseline_mse)

else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 1: Arrivals (2019)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
18/18 ━━━━━━━━━━━━━━━━━━━━ 2s 58ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 1s 19ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step 

Model Evaluation¶

In [272]:
## Model evalauation
# Predict and flatten
train_preds = model1.predict([X_num_train, X_cat_train]).flatten()
test_preds = model1.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse1 = np.sqrt(train_mse)
test_rmse1 = np.sqrt(test_mse)

#MAE
train_mae1 = mean_absolute_error(y_train, train_preds)
test_mae1 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape1 = mean_absolute_percentage_error(y_train, train_preds)
test_mape1 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_1 = r2_score(y_train, train_preds)
test_r2_1 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse1:.4f}, MAE: {train_mae1:.4f}, MAPE: {train_mape1:.4f}, R²: {train_r2_1:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse1:.4f}, MAE: {test_mae1:.4f}, MAPE: {test_mape1:.4f}, R²: {test_r2_1:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
121/121 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step
18/18 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
Train MSE: 0.0857, RMSE: 0.2927, MAE: 0.2219, MAPE: 0.0277, R²: 0.9779
Test MSE: 0.4103, RMSE: 0.6405, MAE: 0.4875, MAPE: 0.0546, R²: 0.8716
In [273]:
# Global trend plot (total actual vs predicted per month)
y_pred_all = model1.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Arrivals (2019)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
164/164 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step
In [274]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals per Country (2019)', fontsize=16, y=1.02)
plt.show()
In [275]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                   country           MSE           RMSE            MAE  \
0                  Austria  2.679614e+09   51764.985675   37030.900614   
1                  Belgium  2.492627e+07    4992.621074    3350.236781   
2   Bosnia and Herzegovina  1.842222e+08   13572.847560   10908.470133   
3                   Canada  7.779840e+07    8820.339876    5862.227914   
4           Czech Republic  5.728626e+08   23934.548834   13299.749715   
5                  Denmark  9.971241e+07    9985.609978    4399.104020   
6                  Finland  4.199998e+07    6480.738915    5031.262491   
7                   France  1.101853e+08   10496.917985    7294.615351   
8                  Germany  4.560198e+10  213546.207054  127152.686279   
9                  Hungary  6.498556e+08   25492.265914   13569.599833   
10                 Ireland  3.477323e+07    5896.883205    3741.162018   
11                   Italy  7.621585e+09   87301.691953   31303.574341   
12             Netherlands  1.198660e+08   10948.333395    7456.408488   
13                  Norway  3.310414e+07    5753.619902    3318.054125   
14                  Poland  2.317188e+09   48137.174735   28348.363419   
15                 Romania  2.196468e+07    4686.648987    2272.309680   
16                Slovakia  2.515013e+07    5014.990083    2605.888275   
17                Slovenia  3.576730e+09   59805.772720   41043.037821   
18                   Spain  2.336578e+07    4833.815971    3305.533460   
19                  Sweden  4.620571e+07    6797.478266    4890.971967   
20             Switzerland  7.623722e+07    8731.392972    6250.464722   
21                     USA  8.693620e+08   29484.944652   23078.987376   
22          United Kingdom  1.602999e+09   40037.469582   28814.113729   

        MAPE        R2  
0   0.317875  0.766548  
1   0.327174  0.943330  
2   0.368343  0.841933  
3   0.354829  0.593108  
4   0.285757  0.931426  
5   0.454653  0.783175  
6   0.584503  0.383765  
7   0.182306  0.967303  
8   0.360820  0.267050  
9   0.315947  0.874721  
10  0.399846  0.507424  
11  0.202527  0.592406  
12  0.327610  0.955578  
13  0.363048  0.925196  
14  0.379036  0.787249  
15  0.254269  0.851325  
16  0.244951  0.993074  
17  0.405495  0.717762  
18  0.251376  0.974041  
19  0.429396  0.946978  
20  0.289273  0.852513  
21  0.473352  0.466255  
22  0.360491  0.654020  

Subset of countries with overnights¶

In [370]:
# Load data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
cool = cool[~cool['country'].isin(['Russian Federation'])]

# Type conversions
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)

# Add Schengen and Euro info
schengen_entry_year = {
    'Austria': 1995, 'Belgium': 1995, 'Czech Republic': 2007, 'Denmark': 2001,
    'Finland': 1996, 'France': 1995, 'Germany': 1995, 'Hungary': 2007,
    'Italy': 1997, 'Netherlands': 1995, 'Norway': 2001, 'Poland': 2007,
    'Portugal': 1995, 'Slovakia': 2007, 'Slovenia': 2007, 'Spain': 1995,
    'Sweden': 2001, 'Switzerland': 2008, 'Romania': 2024,
}

cool['schengen_member'] = cool.apply(lambda row: int(row['year'] >= schengen_entry_year.get(row['country'], np.inf)), axis=1).astype('int8')

# Add Google Trends
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Log transform + lags
cool = cool.sort_values(['country', 'date']).drop_duplicates(subset=['country', 'date']).reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals'])
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])

cool['overnights_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool.drop(columns=['month_name']), ohe_month], axis=1)

cool['overnights_lag_1'] = cool.groupby('country')['arrivals'].shift(1)
cool['overnights_lag_3'] = cool.groupby('country')['arrivals'].shift(3)
cool['overnights_lag_6'] = cool.groupby('country')['arrivals'].shift(6)
cool['overnights_lag_12'] = cool.groupby('country')['arrivals'].shift(12)
cool['cpi_lag_1'] = cool.groupby('country')['cpi'].shift(1)
cool['unemp_rate_lag_3'] = cool.groupby('country')['unemployment_rate'].shift(3)
cool['google_trends_lag_1'] = cool.groupby('country')['google_trends'].shift(1)
cool['google_trends_lag_3'] = cool.groupby('country')['google_trends'].shift(3)

cool = cool[cool['date'] >= '2001-01-01']
cool_sub2 = cool[cool['country'].isin(['Germany', 'Italy', 'Poland', 'Slovenia', 'Switzerland','USA'])].copy()
cool_sub2 = cool_sub2[cool_sub2['overnights_next_month'].notna()].copy()

cool_sub2['country_encoded'] = LabelEncoder().fit_transform(cool_sub2['country'])
month_cols = [col for col in cool_sub2.columns if col.startswith('month_')]

X_numeric = cool_sub2[[
    'unemp_rate_lag_3', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted'] + month_cols].values

X_country_array = cool_sub2['country_encoded'].astype('int32').values
y = cool_sub2['overnights_next_month'].values

train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool_sub2['date'] <= train_end
val_mask = (cool_sub2['date'] > train_end) & (cool_sub2['date'] <= val_end)
test_mask = cool_sub2['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask.to_numpy()])
X_num_val = scaler.transform(X_numeric[val_mask.to_numpy()])
X_num_test = scaler.transform(X_numeric[test_mask.to_numpy()])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [371]:
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
embedding = Embedding(input_dim=len(np.unique(X_country_array)), output_dim=6)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)

model0 = Model(inputs=[input_numeric, input_country], outputs=output)
model0.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
history = model0.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16)

model0.summary()
Epoch 1/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 15s 23ms/step - loss: 96.2673 - val_loss: 83.4533
Epoch 2/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 82.6436 - val_loss: 67.1095
Epoch 3/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 19ms/step - loss: 67.3150 - val_loss: 48.3680
Epoch 4/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 49.8187 - val_loss: 32.1284
Epoch 5/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 30.1898 - val_loss: 14.7346
Epoch 6/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 15.6023 - val_loss: 6.2273
Epoch 7/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 7.5336 - val_loss: 3.6870
Epoch 8/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 4.6136 - val_loss: 2.5168
Epoch 9/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 37ms/step - loss: 4.1666 - val_loss: 2.2894
Epoch 10/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 3.6290 - val_loss: 2.3527
Epoch 11/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 3.4119 - val_loss: 2.3391
Epoch 12/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 4s 51ms/step - loss: 3.8713 - val_loss: 2.3436
Epoch 13/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 4s 18ms/step - loss: 3.3307 - val_loss: 2.2364
Epoch 14/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 14ms/step - loss: 2.7160 - val_loss: 2.2157
Epoch 15/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 3.0357 - val_loss: 2.1943
Epoch 16/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 3.1373 - val_loss: 2.1400
Epoch 17/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 16ms/step - loss: 2.4436 - val_loss: 2.1681
Epoch 18/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 2.6754 - val_loss: 2.1685
Epoch 19/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 2.3551 - val_loss: 2.0866
Epoch 20/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 2.6772 - val_loss: 2.0352
Epoch 21/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 13ms/step - loss: 2.1725 - val_loss: 1.9905
Epoch 22/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 2.4136 - val_loss: 2.1027
Epoch 23/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 2.3841 - val_loss: 2.1134
Epoch 24/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 2.0762 - val_loss: 2.1707
Epoch 25/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 2.2028 - val_loss: 1.9971
Epoch 26/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.7626 - val_loss: 2.0448
Epoch 27/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.9955 - val_loss: 2.0780
Epoch 28/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.9399 - val_loss: 2.0611
Epoch 29/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.7777 - val_loss: 2.0006
Epoch 30/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.9445 - val_loss: 1.9586
Epoch 31/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.7657 - val_loss: 2.0208
Epoch 32/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.9710 - val_loss: 2.0420
Epoch 33/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.6780 - val_loss: 1.9459
Epoch 34/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.6586 - val_loss: 1.9306
Epoch 35/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.5943 - val_loss: 1.9734
Epoch 36/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.4743 - val_loss: 2.0016
Epoch 37/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 24ms/step - loss: 1.7330 - val_loss: 1.9821
Epoch 38/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 20ms/step - loss: 1.6734 - val_loss: 1.9506
Epoch 39/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.6656 - val_loss: 1.8936
Epoch 40/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.4171 - val_loss: 1.9393
Epoch 41/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.3501 - val_loss: 1.9552
Epoch 42/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.5745 - val_loss: 2.0136
Epoch 43/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.6027 - val_loss: 1.9249
Epoch 44/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4072 - val_loss: 1.9168
Epoch 45/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2585 - val_loss: 1.9042
Epoch 46/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.3201 - val_loss: 1.9343
Epoch 47/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 1.3753 - val_loss: 1.9005
Epoch 48/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4154 - val_loss: 1.8944
Epoch 49/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.2585 - val_loss: 1.9297
Epoch 50/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.1731 - val_loss: 1.9718
Epoch 51/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3242 - val_loss: 1.9290
Epoch 52/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2209 - val_loss: 1.9211
Epoch 53/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2861 - val_loss: 1.9466
Epoch 54/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.2940 - val_loss: 1.8893
Epoch 55/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.2450 - val_loss: 1.9128
Epoch 56/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2355 - val_loss: 1.9380
Epoch 57/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1741 - val_loss: 1.9609
Epoch 58/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2285 - val_loss: 1.9153
Epoch 59/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1282 - val_loss: 1.9195
Epoch 60/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1603 - val_loss: 1.9625
Epoch 61/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1549 - val_loss: 1.8665
Epoch 62/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1905 - val_loss: 1.9088
Epoch 63/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.0087 - val_loss: 1.9052
Epoch 64/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 21ms/step - loss: 0.9954 - val_loss: 1.9662
Epoch 65/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 12ms/step - loss: 1.1956 - val_loss: 1.9599
Epoch 66/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.1244 - val_loss: 1.8904
Epoch 67/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.1630 - val_loss: 1.9382
Epoch 68/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1095 - val_loss: 1.9623
Epoch 69/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0993 - val_loss: 1.9723
Epoch 70/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.0702 - val_loss: 1.9438
Epoch 71/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1175 - val_loss: 1.9137
Epoch 72/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0992 - val_loss: 1.9411
Epoch 73/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0389 - val_loss: 2.0290
Epoch 74/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.9781 - val_loss: 1.9440
Epoch 75/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 0.9713 - val_loss: 1.9440
Epoch 76/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.1828 - val_loss: 1.9852
Epoch 77/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 1.0950 - val_loss: 1.9141
Epoch 78/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 3s 29ms/step - loss: 1.0716 - val_loss: 1.9159
Epoch 79/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.0330 - val_loss: 1.9094
Epoch 80/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1201 - val_loss: 1.8987
Epoch 81/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0059 - val_loss: 1.9590
Epoch 82/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1762 - val_loss: 1.9424
Epoch 83/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 1.0011 - val_loss: 1.9317
Epoch 84/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.1152 - val_loss: 1.9488
Epoch 85/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 1.0686 - val_loss: 2.0011
Epoch 86/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.0379 - val_loss: 1.9176
Epoch 87/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0014 - val_loss: 1.9522
Epoch 88/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9960 - val_loss: 1.9251
Epoch 89/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9819 - val_loss: 1.9313
Epoch 90/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0863 - val_loss: 1.9259
Epoch 91/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0982 - val_loss: 1.9510
Epoch 92/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1539 - val_loss: 1.9678
Epoch 93/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9057 - val_loss: 1.9402
Epoch 94/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.9511 - val_loss: 1.9875
Epoch 95/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9574 - val_loss: 1.9802
Epoch 96/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1070 - val_loss: 1.9393
Epoch 97/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.0748 - val_loss: 1.9470
Epoch 98/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.1290 - val_loss: 1.9775
Epoch 99/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.1856 - val_loss: 1.9335
Epoch 100/100
72/72 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9632 - val_loss: 1.9449
Model: "functional_42"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_42        │ (None, 1, 6)      │         36 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_42          │ (None, 6)         │          0 │ embedding_42[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_42      │ (None, 31)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_42[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_126 (Dense)   │ (None, 64)        │      2,048 │ concatenate_42[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_126[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_84          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_127 (Dense)   │ (None, 32)        │      2,080 │ dropout_84[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_127[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_85          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_128 (Dense)   │ (None, 1)         │         33 │ dropout_85[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 13,361 (52.20 KB)
 Trainable params: 4,389 (17.14 KB)
 Non-trainable params: 192 (768.00 B)
 Optimizer params: 8,780 (34.30 KB)
In [372]:
# Baseline predictions
baseline_preds = model0.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model0.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model0.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Model 1: Overnights, subset of high-volume countries (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 57ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 42ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 24ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 1s 60ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 18ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 

Model Evaluation¶

In [373]:
## Model evalauation
# Predict and flatten
train_preds = model0.predict([X_num_train, X_cat_train]).flatten()
test_preds = model0.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse0 = np.sqrt(train_mse)
test_rmse0 = np.sqrt(test_mse)

#MAE
train_mae0 = mean_absolute_error(y_train, train_preds)
test_mae0 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape0 = mean_absolute_percentage_error(y_train, train_preds)
test_mape0 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_0 = r2_score(y_train, train_preds)
test_r2_0 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse0:.4f}, MAE: {train_mae0:.4f}, MAPE: {train_mape0:.4f}, R²: {train_r2_0:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse0:.4f}, MAE: {test_mae0:.4f}, MAPE: {test_mape0:.4f}, R²: {test_r2_0:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
36/36 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step
9/9 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step
Train MSE: 0.0881, RMSE: 0.2968, MAE: 0.2366, MAPE: 0.0257, R²: 0.9726
Test MSE: 0.4140, RMSE: 0.6434, MAE: 0.4859, MAPE: 0.0480, R²: 0.8591
In [374]:
# Global trend plot (total actual vs predicted per month)

y_pred_all = model0.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool_sub2.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights Subset(2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
54/54 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step
In [375]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals (2024)', fontsize=16, y=1.02)
plt.show()
In [376]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
       country           MSE           RMSE            MAE      MAPE        R2
0      Germany  4.024194e+10  200603.944104  121130.643923  0.450907  0.560532
1        Italy  7.848145e+08   28014.540743   16760.034107  0.388984  0.910066
2       Poland  5.813232e+09   76244.554155   43257.363849  0.408261  0.572232
3     Slovenia  3.027006e+09   55018.231569   37428.719706  0.524595  0.755425
4  Switzerland  1.977036e+08   14060.710910    7427.436933  0.330616  0.659180
5          USA  8.089627e+08   28442.269892   19675.270503  0.460863  0.580131
In [ ]:
 

Non schengen with overnights¶

In [377]:
# Load data
cool = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/df_top_cool.csv")
cool.replace('..', np.nan, inplace=True)
cool['date'] = pd.to_datetime(cool[['year', 'month']].assign(day=1))
cool = cool[~cool['country'].isin(['Russian Federation'])]

# Type conversions
cool['year'] = pd.to_numeric(cool['year'], downcast='integer', errors='coerce')
cool['month'] = pd.to_numeric(cool['month'], downcast='integer', errors='coerce')
cool['arrivals'] = pd.to_numeric(cool['arrivals'], errors='coerce')
cool['overnights'] = pd.to_numeric(cool['overnights'], errors='coerce')
cool['unemployment_rate'] = pd.to_numeric(cool['unemployment_rate'], errors='coerce')
cool['exchange_rate'] = pd.to_numeric(cool['exchange_rate'], errors='coerce')
cool['cpi'] = pd.to_numeric(cool['cpi'], errors='coerce')
cool['eu_member'] = cool['eu_member'].astype(int)

# Add Schengen and Euro info
schengen_entry_year = {
    'Austria': 1995, 'Belgium': 1995, 'Czech Republic': 2007, 'Denmark': 2001,
    'Finland': 1996, 'France': 1995, 'Germany': 1995, 'Hungary': 2007,
    'Italy': 1997, 'Netherlands': 1995, 'Norway': 2001, 'Poland': 2007,
    'Portugal': 1995, 'Slovakia': 2007, 'Slovenia': 2007, 'Spain': 1995,
    'Sweden': 2001, 'Switzerland': 2008, 'Romania': 2024,
}

cool['schengen_member'] = cool.apply(lambda row: int(row['year'] >= schengen_entry_year.get(row['country'], np.inf)), axis=1).astype('int8')

# Add Google Trends
td = pd.read_csv("C:/Users/Korisnik/Desktop/Škola/THESIS ideas/multiTimeline.csv")
td['date'] = pd.to_datetime(td['date']).dt.to_period('M').dt.to_timestamp()
td_long = td.melt(id_vars='date', var_name='country', value_name='google_trends')
cool['date'] = pd.to_datetime(cool['date']).dt.to_period('M').dt.to_timestamp()
cool = cool.merge(td_long, on=['date', 'country'], how='left')
cool['google_trends'] = cool['google_trends'].fillna(-1)

# Log transform + lags
cool = cool.sort_values(['country', 'date']).drop_duplicates(subset=['country', 'date']).reset_index(drop=True)
cool['arrivals'] = np.log1p(cool['arrivals'])
cool['overnights'] = np.log1p(cool['overnights'])
cool['exchange_rate'] = np.log1p(cool['exchange_rate'])
cool['cpi'] = np.log1p(cool['cpi'])

cool['overnights_next_month'] = cool.groupby('country')['arrivals'].shift(-1)

month_names = {
    1: 'January', 2: 'February', 3: 'March', 4: 'April',
    5: 'May', 6: 'June', 7: 'July', 8: 'August',
    9: 'September', 10: 'October', 11: 'November', 12: 'December'
}
cool['month_name'] = cool['month'].map(month_names)
ohe_month = pd.get_dummies(cool['month_name'], prefix='month').astype(int)
cool = pd.concat([cool.drop(columns=['month_name']), ohe_month], axis=1)

cool['overnights_lag_1'] = cool.groupby('country')['arrivals'].shift(1)
cool['overnights_lag_3'] = cool.groupby('country')['arrivals'].shift(3)
cool['overnights_lag_6'] = cool.groupby('country')['arrivals'].shift(6)
cool['overnights_lag_12'] = cool.groupby('country')['arrivals'].shift(12)
cool['cpi_lag_1'] = cool.groupby('country')['cpi'].shift(1)
cool['unemp_rate_lag_3'] = cool.groupby('country')['unemployment_rate'].shift(3)
cool['google_trends_lag_1'] = cool.groupby('country')['google_trends'].shift(1)
cool['google_trends_lag_3'] = cool.groupby('country')['google_trends'].shift(3)

cool = cool[cool['date'] >= '2001-01-01']
cool_sub2 = cool[cool['country'].isin(['Ireland','Bosnia and Herzegovina', 'USA', 'United Kingdom', 'Canada'])].copy()
cool_sub2 = cool_sub2[cool_sub2['overnights_next_month'].notna()].copy()

cool_sub2['country_encoded'] = LabelEncoder().fit_transform(cool_sub2['country'])
month_cols = [col for col in cool_sub2.columns if col.startswith('month_')]

X_numeric = cool_sub2[[
    'unemp_rate_lag_3', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted'] + month_cols].values

X_country_array = cool_sub2['country_encoded'].astype('int32').values
y = cool_sub2['overnights_next_month'].values

train_end = pd.Timestamp("2016-12-31")
val_end = pd.Timestamp("2020-12-31")
train_mask = cool_sub2['date'] <= train_end
val_mask = (cool_sub2['date'] > train_end) & (cool_sub2['date'] <= val_end)
test_mask = cool_sub2['date'] > val_end

scaler = StandardScaler()
X_num_train = scaler.fit_transform(X_numeric[train_mask.to_numpy()])
X_num_val = scaler.transform(X_numeric[val_mask.to_numpy()])
X_num_test = scaler.transform(X_numeric[test_mask.to_numpy()])
X_numeric_scaled_all = scaler.transform(X_numeric)

X_cat_train = X_country_array[train_mask.to_numpy()]
X_cat_val = X_country_array[val_mask.to_numpy()]
X_cat_test = X_country_array[test_mask.to_numpy()]
y_train = y[train_mask.to_numpy()]
y_val = y[val_mask.to_numpy()]
y_test = y[test_mask.to_numpy()]
In [378]:
input_numeric = Input(shape=(X_num_train.shape[1],), name="numeric_input")
input_country = Input(shape=(1,), dtype='int32', name="country_input")
embedding = Embedding(input_dim=len(np.unique(X_country_array)), output_dim=6)(input_country)
embedding_flat = Flatten()(embedding)
x = Concatenate()([input_numeric, embedding_flat])
x = Dense(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
output = Dense(1)(x)

model0 = Model(inputs=[input_numeric, input_country], outputs=output)
model0.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

early_stop = EarlyStopping(monitor='val_loss', patience=50, restore_best_weights=True)
history = model0.fit(
    [X_num_train, X_cat_train], y_train,
    validation_data=([X_num_val, X_cat_val], y_val),
    epochs=100, batch_size=16)

model0.summary()
Epoch 1/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 17s 21ms/step - loss: 70.8847 - val_loss: 65.1186
Epoch 2/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 61.7225 - val_loss: 54.1320
Epoch 3/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 53.5805 - val_loss: 43.0327
Epoch 4/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 41.2317 - val_loss: 30.1844
Epoch 5/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 28.8166 - val_loss: 18.6230
Epoch 6/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 17.4525 - val_loss: 9.2107
Epoch 7/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 9.7269 - val_loss: 4.5280
Epoch 8/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 4.8463 - val_loss: 2.5077
Epoch 9/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 3.5921 - val_loss: 2.0433
Epoch 10/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 3.1513 - val_loss: 1.9704
Epoch 11/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 2.9756 - val_loss: 2.1139
Epoch 12/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 2.5745 - val_loss: 1.9515
Epoch 13/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 2.6133 - val_loss: 1.9761
Epoch 14/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 2.5276 - val_loss: 2.0180
Epoch 15/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 2.3871 - val_loss: 1.8424
Epoch 16/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 2.7073 - val_loss: 1.9609
Epoch 17/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 2.0080 - val_loss: 1.8744
Epoch 18/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 15ms/step - loss: 1.9327 - val_loss: 1.9323
Epoch 19/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 2.0990 - val_loss: 1.9120
Epoch 20/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 14ms/step - loss: 2.1032 - val_loss: 1.9193
Epoch 21/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 30ms/step - loss: 1.8576 - val_loss: 1.8397
Epoch 22/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.8819 - val_loss: 1.7557
Epoch 23/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 2.0402 - val_loss: 1.8278
Epoch 24/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.9438 - val_loss: 1.9030
Epoch 25/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.8934 - val_loss: 1.9401
Epoch 26/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.7906 - val_loss: 1.9785
Epoch 27/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 17ms/step - loss: 1.4435 - val_loss: 1.9742
Epoch 28/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.6720 - val_loss: 2.0107
Epoch 29/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.4551 - val_loss: 1.9018
Epoch 30/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.4942 - val_loss: 1.9666
Epoch 31/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.4540 - val_loss: 1.8454
Epoch 32/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.3189 - val_loss: 1.9146
Epoch 33/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.3622 - val_loss: 1.9060
Epoch 34/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.2370 - val_loss: 1.9225
Epoch 35/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 1.4445 - val_loss: 1.8731
Epoch 36/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.3872 - val_loss: 1.9369
Epoch 37/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 1.2616 - val_loss: 1.8875
Epoch 38/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.1634 - val_loss: 1.9163
Epoch 39/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.1789 - val_loss: 1.9048
Epoch 40/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.2247 - val_loss: 1.9471
Epoch 41/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 1.2143 - val_loss: 1.9269
Epoch 42/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.1808 - val_loss: 1.8705
Epoch 43/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.2053 - val_loss: 1.8597
Epoch 44/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 1.0917 - val_loss: 1.8741
Epoch 45/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.2388 - val_loss: 1.8228
Epoch 46/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 1.0954 - val_loss: 1.8820
Epoch 47/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 1.0031 - val_loss: 1.8729
Epoch 48/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 23ms/step - loss: 1.0973 - val_loss: 1.8217
Epoch 49/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.0790 - val_loss: 1.8532
Epoch 50/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 1.1115 - val_loss: 1.8916
Epoch 51/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 1.0503 - val_loss: 1.8744
Epoch 52/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 1.0296 - val_loss: 1.9047
Epoch 53/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9012 - val_loss: 1.8382
Epoch 54/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.8731 - val_loss: 1.8559
Epoch 55/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.9681 - val_loss: 1.8877
Epoch 56/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8886 - val_loss: 1.8650
Epoch 57/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.9479 - val_loss: 1.8268
Epoch 58/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9344 - val_loss: 1.8583
Epoch 59/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.9196 - val_loss: 1.8765
Epoch 60/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.9520 - val_loss: 1.8995
Epoch 61/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.9951 - val_loss: 1.8520
Epoch 62/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.9381 - val_loss: 1.9227
Epoch 63/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.8610 - val_loss: 1.8910
Epoch 64/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.8589 - val_loss: 1.8743
Epoch 65/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8759 - val_loss: 1.8455
Epoch 66/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8582 - val_loss: 1.9081
Epoch 67/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 14ms/step - loss: 0.8697 - val_loss: 1.9117
Epoch 68/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 16ms/step - loss: 0.9366 - val_loss: 1.9303
Epoch 69/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 15ms/step - loss: 0.9348 - val_loss: 1.9223
Epoch 70/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9144 - val_loss: 1.8986
Epoch 71/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.8233 - val_loss: 1.9130
Epoch 72/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.8267 - val_loss: 1.9244
Epoch 73/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.8641 - val_loss: 1.9100
Epoch 74/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 13ms/step - loss: 0.8258 - val_loss: 1.9218
Epoch 75/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.9032 - val_loss: 1.9354
Epoch 76/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 11ms/step - loss: 0.8490 - val_loss: 1.9294
Epoch 77/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.7667 - val_loss: 1.8919
Epoch 78/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.7664 - val_loss: 1.8976
Epoch 79/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8923 - val_loss: 1.9398
Epoch 80/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.9168 - val_loss: 1.8888
Epoch 81/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8413 - val_loss: 1.8984
Epoch 82/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8013 - val_loss: 1.9316
Epoch 83/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.8196 - val_loss: 1.9271
Epoch 84/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 12ms/step - loss: 0.7840 - val_loss: 1.9019
Epoch 85/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7197 - val_loss: 1.9103
Epoch 86/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.8005 - val_loss: 1.9403
Epoch 87/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.7906 - val_loss: 1.9339
Epoch 88/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.8263 - val_loss: 1.8825
Epoch 89/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.7933 - val_loss: 1.9429
Epoch 90/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step - loss: 0.7287 - val_loss: 1.9740
Epoch 91/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7661 - val_loss: 1.9171
Epoch 92/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8285 - val_loss: 1.9287
Epoch 93/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 0s 6ms/step - loss: 0.7490 - val_loss: 1.9288
Epoch 94/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.8355 - val_loss: 1.9520
Epoch 95/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.7089 - val_loss: 1.9161
Epoch 96/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 9ms/step - loss: 0.7353 - val_loss: 1.9557
Epoch 97/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 10ms/step - loss: 0.7541 - val_loss: 2.0245
Epoch 98/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.7487 - val_loss: 1.9634
Epoch 99/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step - loss: 0.7933 - val_loss: 1.9698
Epoch 100/100
60/60 ━━━━━━━━━━━━━━━━━━━━ 1s 7ms/step - loss: 0.7867 - val_loss: 1.9595
Model: "functional_43"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ country_input       │ (None, 1)         │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ embedding_43        │ (None, 1, 6)      │         30 │ country_input[0]… │
│ (Embedding)         │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ numeric_input       │ (None, 25)        │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ flatten_43          │ (None, 6)         │          0 │ embedding_43[0][… │
│ (Flatten)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ concatenate_43      │ (None, 31)        │          0 │ numeric_input[0]… │
│ (Concatenate)       │                   │            │ flatten_43[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_129 (Dense)   │ (None, 64)        │      2,048 │ concatenate_43[0… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 64)        │        256 │ dense_129[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_86          │ (None, 64)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_130 (Dense)   │ (None, 32)        │      2,080 │ dropout_86[0][0]  │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ batch_normalizatio… │ (None, 32)        │        128 │ dense_130[0][0]   │
│ (BatchNormalizatio… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dropout_87          │ (None, 32)        │          0 │ batch_normalizat… │
│ (Dropout)           │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_131 (Dense)   │ (None, 1)         │         33 │ dropout_87[0][0]  │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 13,343 (52.12 KB)
 Trainable params: 4,383 (17.12 KB)
 Non-trainable params: 192 (768.00 B)
 Optimizer params: 8,768 (34.25 KB)
In [379]:
# Baseline predictions
baseline_preds = model0.predict([X_num_test, X_cat_test]).flatten()
baseline_mse = mean_squared_error(y_test, baseline_preds)

feature_names = [
    'unemployment_rate', 'exchange_rate', 'cpi_lag_1',
    'overnights_lag_1', 'overnights_lag_3', 'overnights_lag_6', 'overnights_lag_12',
    'google_trends_lag_1', 'google_trends_lag_3', 'schengen_member', 'euro_adopted']+month_cols

importances = []
for i in range(X_num_test.shape[1]):
    X_permuted = X_num_test.copy()
    X_permuted[:, i] = np.random.permutation(X_permuted[:, i])
    permuted_preds = model0.predict([X_permuted, X_cat_test]).flatten()
    permuted_mse = mean_squared_error(y_test, permuted_preds)
    importances.append(permuted_mse - baseline_mse)

#Handle grouped month dummies so its not one bar for each month
month_indices = [i for i, f in enumerate(feature_names) if f.startswith('month_')]

if month_indices:
    # Safe grouped permutation
    X_month_permuted = X_num_test.copy()
    row_perm = np.random.permutation(len(X_month_permuted))
    X_month_permuted[:, month_indices] = X_month_permuted[row_perm][:, month_indices]
    month_preds = model0.predict([X_month_permuted, X_cat_test]).flatten()
    month_importance = mean_squared_error(y_test, month_preds) - baseline_mse
else:
    month_importance = 0  # fallback if no month cols present

filtered_names = [f for i, f in enumerate(feature_names) if i not in month_indices]
filtered_importances = [imp for i, imp in enumerate(importances) if i not in month_indices]
feature_names_final = filtered_names + ['month_group']
importances_final = filtered_importances + [month_importance]
sorted_pairs = sorted(zip(importances_final, feature_names_final), reverse=True)
importances_sorted, feature_names_sorted = zip(*sorted_pairs)

plt.figure(figsize=(12, 6))
bars = plt.barh(feature_names_sorted, importances_sorted)
plt.xlabel("Increase in MSE when shuffled")
plt.title("Overnights, Non-Schengen countries (2024)")
plt.gca().invert_yaxis()

# Add labels
for bar in bars:
    width = bar.get_width()
    plt.text(width + 0.001, bar.get_y() + bar.get_height() / 2,
             f"{width:.4f}", va='center')

plt.tight_layout()
plt.show()
8/8 ━━━━━━━━━━━━━━━━━━━━ 1s 58ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 11ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 13ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 7ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step 
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 12ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 10ms/step

Model Evaluation¶

In [380]:
## Model evalauation
# Predict and flatten
train_preds = model0.predict([X_num_train, X_cat_train]).flatten()
test_preds = model0.predict([X_num_test, X_cat_test]).flatten()

# MSE
train_mse = mean_squared_error(y_train, train_preds)
test_mse = mean_squared_error(y_test, test_preds)

# RMSE
train_rmse0 = np.sqrt(train_mse)
test_rmse0 = np.sqrt(test_mse)

#MAE
train_mae0 = mean_absolute_error(y_train, train_preds)
test_mae0 = mean_absolute_error(y_test, test_preds)

# MAPE
train_mape0 = mean_absolute_percentage_error(y_train, train_preds)
test_mape0 = mean_absolute_percentage_error(y_test, test_preds)

# R-squared
train_r2_0 = r2_score(y_train, train_preds)
test_r2_0 = r2_score(y_test, test_preds)

# Print all metrics
print(f"Train MSE: {train_mse:.4f}, RMSE: {train_rmse0:.4f}, MAE: {train_mae0:.4f}, MAPE: {train_mape0:.4f}, R²: {train_r2_0:.4f}")
print(f"Test MSE: {test_mse:.4f}, RMSE: {test_rmse0:.4f}, MAE: {test_mae0:.4f}, MAPE: {test_mape0:.4f}, R²: {test_r2_0:.4f}")

# Plot training history
plt.figure(figsize=(10, 4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()
30/30 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step 
Train MSE: 0.0733, RMSE: 0.2707, MAE: 0.2129, MAPE: 0.0270, R²: 0.9715
Test MSE: 0.5642, RMSE: 0.7512, MAE: 0.6040, MAPE: 0.0710, R²: 0.8313
In [381]:
# Global trend plot (total actual vs predicted per month)

y_pred_all = model0.predict([X_numeric_scaled_all, X_country_array]).flatten()
y_actual_all = np.expm1(y)
y_pred_all = np.expm1(y_pred_all)
df_plot = cool_sub2.copy()
df_plot['actual'] = y_actual_all
df_plot['predicted'] = y_pred_all
monthly_totals_all = df_plot.groupby('date')[['actual', 'predicted']].sum()

plt.figure(figsize=(16, 6))
plt.plot(monthly_totals_all.index, monthly_totals_all['actual'], label='Total Actual', linewidth=2)
plt.plot(monthly_totals_all.index, monthly_totals_all['predicted'], label='Total Predicted', linestyle='--', alpha=0.8)
plt.title("Total Monthly Overnights Subset(2024)")
plt.xlabel("Date")
plt.ylabel("Total Overnights")
plt.xticks(rotation=90)
plt.ticklabel_format(style='plain', axis='y')
plt.legend()
plt.tight_layout()
ax = plt.gca()
ax.xaxis.set_major_locator(mdates.MonthLocator(interval=6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.xlim(monthly_totals_all.index.min(), monthly_totals_all.index.max())
plt.axvline(x=train_end, color='gray', linestyle='--', linewidth=1.5, label='Train/Val Split')
plt.axvline(x=val_end, color='black', linestyle='--', linewidth=1.5, label='Val/Test Split')
plt.text(train_end, plt.ylim()[1]*0.95, 'Training set', rotation=90, ha='right', va='top', color='gray')
plt.text(val_end, plt.ylim()[1]*0.95, 'Testing set', rotation=90, ha='right', va='top', color='black')
plt.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)
plt.show()
45/45 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
In [382]:
# Country-level plots
countries = df_plot['country'].unique()
fig, axes = plt.subplots(len(countries) // 3 + 1, 3, figsize=(18, 3 * (len(countries) // 3 + 1)), sharex=False, sharey=False)

axes = axes.flatten()

for i, country in enumerate(countries):
    country_df = df_plot[df_plot['country'] == country].groupby('date')[['actual', 'predicted']].sum()
    ax = axes[i]
    ax.plot(country_df.index, country_df['actual'], label='Actual')
    ax.plot(country_df.index, country_df['predicted'], label='Predicted', linestyle='--')
    ax.set_title(country)
    
    # Show x-axis ticks every 3 years
    ax.xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.6)

    # Add legend only once
    if i == 0:
        ax.legend()

# Remove unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.tight_layout()
fig.suptitle('Actual vs Predicted Monthly Arrivals (2024)', fontsize=16, y=1.02)
plt.show()
In [383]:
# df_plot: DataFrame with columns ['country', 'date', 'actual', 'predicted']
# List of countries
countries = df_plot['country'].unique()

results = []

for country in countries:
    # Subset data for this country (test set only, if you want)
    country_df = df_plot[(df_plot['country'] == country) & (df_plot['date'] > val_end)]
    
    actual = country_df['actual'].values
    pred = country_df['predicted'].values
    
    if len(actual) == 0:
        continue  # Skip countries with no data in test set
    
    mse = round(mean_squared_error(actual, pred),6)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, pred)
    mape = mean_absolute_percentage_error(actual, pred)
    r2 = r2_score(actual, pred)
    
    results.append({
        'country': country,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'MAPE': mape,
        'R2': r2
    })

# Convert to DataFrame for easy export or display
country_metrics = pd.DataFrame(results)
print(country_metrics)
                  country           MSE          RMSE           MAE      MAPE  \
0  Bosnia and Herzegovina  2.486598e+08  15768.949282  10873.771147  0.449015   
1                  Canada  1.910291e+07   4370.687442   2639.945901  0.562360   
2                 Ireland  3.610469e+07   6008.717519   3796.018935  0.632371   
3                     USA  1.047077e+09  32358.572628  22942.086356  0.565808   
4          United Kingdom  2.106355e+09  45895.044879  29439.136403  0.882562   

         R2  
0  0.779294  
1  0.832437  
2  0.497997  
3  0.456544  
4  0.327133